#include "stdinc.h"
#include "newhash.h"
#include "extfunc.h"
#include "extvab.h"

#define PUBLIC_FUNC
#define PROTECTED_FUNC

static const kmer_t empty_kmer = {0, 0, 0, 0, 0, 0, 1, 0,0};

static inline void update_kmer(kmer_t *mer, ubyte left, ubyte right){
	ubyte4 cov;

	if(left<4){
		cov = get_kmer_left_cov(*mer, left);
		if(cov < MAX_KMER_COV){
			set_kmer_left_cov(*mer, left, cov + 1);
		}
	}

	if(right<4){
		cov = get_kmer_right_cov(*mer, right);
		if(cov < MAX_KMER_COV){
			set_kmer_right_cov(*mer, right, cov + 1);
		}
	}
}

static inline void set_new_kmer(kmer_t *mer, ubyte8 seq, ubyte left, ubyte right){
	*mer = empty_kmer;
	set_kmer_seq(*mer, seq);
	if(left<4)
		set_kmer_left_cov(*mer, left, 1);
	if(right<4)
		set_kmer_right_cov(*mer, right, 1);
}


static inline int is_prime_kh(ubyte8 num){
	ubyte8 i, max;
	if(num < 4) return 1;
	if(num % 2 == 0) return 0;
	max = (ubyte8)sqrt((float)num);
	for(i=3;i<max;i+=2){ if(num % i == 0) return 0; }
	return 1;
}

static inline ubyte8 find_next_prime_kh(ubyte8 num){
	if(num % 2 == 0) num ++;
	while(1){ if(is_prime_kh(num)) return num; num += 2; }
}

PUBLIC_FUNC KmerSet* init_kmerset(ubyte8 init_size, float load_factor){
	KmerSet *set;
	if(init_size < 3) init_size = 3;
	else init_size = find_next_prime_kh(init_size);
	set = (KmerSet*)malloc(sizeof(KmerSet));
	set->size   = init_size;
	set->count  = 0;

	set->searchCnt = 0;
	set->foundCnt = 0;
	set->delCnt = 0;
	set->searchSpcSeedCnt = 0;
	set->getSpcSeedCnt = 0;
	set->levelGet[0] = 0;
	set->levelGet[1] = 0;
	set->levelGet[2] = 0;	
	
	set->max    = set->size * load_factor;
	if(load_factor <= 0) load_factor = 0.25f;
	else if(load_factor >= 1) load_factor = 0.75f;
	set->load_factor = load_factor;
	set->iter_ptr    = 0;
	set->array = calloc(set->size, sizeof(kmer_t));
	set->flags = malloc((set->size + 15)/16 * 4);
	memset(set->flags, 0x55, (set->size + 15) / 16 * 4);
	return set;
}

PROTECTED_FUNC static inline ubyte8 get_kmerset(KmerSet *set, ubyte8 seq){
	ubyte8 hc;
	hc = seq % set->size;
	while(1){
		if(is_kmer_entity_null(set->flags, hc)){
			return hc;
		} else {
			if(get_kmer_seq(set->array[hc]) == seq) return hc;
		}
		hc ++;
		if(hc == set->size) hc = 0;
	}
	return set->size;
}

PUBLIC_FUNC int search_kmerset(KmerSet *set, ubyte8 seq, kmer_t **rs){
	ubyte8 hc;
	hc = seq % set->size;
	while(1){
		if(is_kmer_entity_null(set->flags, hc)){
			return 0;
		} else {
			if(get_kmer_seq(set->array[hc]) == seq){
				*rs = set->array + hc;
				return 1;
			}
		}
		hc ++;
		if(hc == set->size) hc = 0;
	}
	return 0;
}

PUBLIC_FUNC static inline int exists_kmerset(KmerSet *set, ubyte8 seq){ 
	ubyte8 idx;
	idx = get_kmerset(set, seq);
	return !is_kmer_entity_null(set->flags, idx);
}

PROTECTED_FUNC static inline void encap_kmerset(KmerSet *set, ubyte8 num){
	ubyte4 *flags, *f;
	ubyte8 i, n, size, hc;
	kmer_t key, tmp;
	if(set->count + num <= set->max) return;
	n = set->size;
	do{
		if(n < 0xFFFFFFFU)
			n <<= 1;
		else
			n += 0xFFFFFFU;
		n = find_next_prime_kh(n); 
	} while(n * set->load_factor < set->count + num);

	set->array = realloc(set->array, n * sizeof(kmer_t));
	if(set->array == NULL){
		fprintf(stderr, "-- Out of memory --\n");
		abort();
	}
	flags = malloc((n+15)/16 * 4);
	memset(flags, 0x55, (n+15)/16 * 4);
	size = set->size;
	set->size = n;
	set->max = n * set->load_factor;
	f = set->flags;
	set->flags = flags;
	flags = f;
	for(i=0;i<size;i++){
		if(!exists_kmer_entity(flags, i)) continue;
		key = set->array[i];
		set_kmer_entity_del(flags, i);
		while(1){
			hc = get_kmer_seq(key) % set->size;
			while(!is_kmer_entity_null(set->flags, hc)){ hc ++; if(hc == set->size) hc = 0; }
			clear_kmer_entity_null(set->flags, hc);
			if(hc < size && exists_kmer_entity(flags, hc)){ 
				tmp = key;
				key = set->array[hc];
				set->array[hc] = tmp;
				set_kmer_entity_del(flags, hc);
			} else {
				set->array[hc] = key;
				break;
			}
		}
	}
	free(flags);
}

PUBLIC_FUNC int put_kmerset(KmerSet *set, ubyte8 seq, ubyte left, ubyte right, kmer_t **kmer_p){
	ubyte8 hc;
	encap_kmerset(set, 1);
	hc = seq % set->size;
	do{
		if(is_kmer_entity_null(set->flags, hc)){
			clear_kmer_entity_null(set->flags, hc);
			set_new_kmer(set->array + hc, seq, left, right);
			set->count ++;
			*kmer_p = set->array + hc;
			return 0;
		} else {
			if(get_kmer_seq(set->array[hc]) == seq){
				update_kmer(set->array + hc, left, right);
				set->array[hc].single = 0;
				*kmer_p = set->array + hc;
				return 1;
			}
		}
		hc ++;
		if(hc == set->size) hc = 0;
	} while(1);
	*kmer_p = NULL;
	return 0;
}

PUBLIC_FUNC byte8 count_kmerset(KmerSet *set){ return set->count; }

PUBLIC_FUNC static inline void reset_iter_kmerset(KmerSet *set){ set->iter_ptr = 0; }

PUBLIC_FUNC static inline ubyte8 iter_kmerset(KmerSet *set, kmer_t **rs){
	while(set->iter_ptr < set->size){
		if(!is_kmer_entity_null(set->flags, set->iter_ptr)){
			*rs = set->array + set->iter_ptr;
			set->iter_ptr ++;
			return 1;
		}
		set->iter_ptr ++;
	}
	return 0;
}

PUBLIC_FUNC void free_kmerset(KmerSet *set){
	free(set->array);
	free(set->flags);
	free(set);
}

PUBLIC_FUNC  void free_Sets(KmerSet **sets,int num){
	int i;
	for(i=0;i<num;i++)
		free_kmerset(sets[i]);
	free((void*)sets);
}

int count_branch2prev(kmer_t *node)
{
	int num = 0,i;

	for(i=0;i<4;i++){
		if(get_kmer_left_cov(*node,i)>0)
			num++;
	}
	return num;
}

int count_branch2next(kmer_t *node)
{
	int num = 0,i;

	for(i=0;i<4;i++){
		if(get_kmer_right_cov(*node,i)>0)
			num++;
	}
	return num;
}

void dislink2prevUncertain(kmer_t *node,char ch,boolean smaller)
{
	if(smaller)
		set_kmer_left_cov(*node,ch,0);
	else
		set_kmer_right_cov(*node,int_comp(ch),0);
	
}

void dislink2nextUncertain(kmer_t *node,char ch,boolean smaller)
{
	if(smaller)
		set_kmer_right_cov(*node,ch,0);
	else
		set_kmer_left_cov(*node,int_comp(ch),0);
}
	





////////////////// functions for spaced seed Kmer hash

static const spcKmer empty_spckmer = {0, NULL, 1};

static inline int update_spckmer(spcKmer *mer, ubyte2 s_bases, kmer_t *node){
//	if(mer->start == NULL)
//		fprintf(stderr, "start err at:\t%llu\n",mer->seq);
		
	spcBase *tmpBase=mer->start;

	spcBase *newSpcBase;
	newSpcBase = (spcBase*)malloc(sizeof(spcBase));
	newSpcBase->spaced_bases = s_bases;
//	newSpcBase->edgeID = edgeID;
	newSpcBase->large_kmer = node;
	newSpcBase->next = tmpBase->next;
	tmpBase->next = newSpcBase;
	
	mer->spaced_base_num++;
	
//	mvnv(0,"update %llu :\t%hu\tnum: %u\n", mer->seq, tmpBase->next->spaced_bases, mer->spaced_base_num);
	return 0;
}

static inline void set_new_spckmer(spcKmer *mer, Kmer spc_kmer, ubyte2 s_bases, kmer_t *node){
	*mer = empty_spckmer;
	set_kmer_seq(*mer, spc_kmer);
	
	spcBase *newSpcBase;
	newSpcBase = (spcBase*)malloc(sizeof(spcBase));
	newSpcBase->spaced_bases = s_bases;
//	newSpcBase->repeat = 0;
//	newSpcBase->edgeID = edgeID;
	newSpcBase->large_kmer = node;
	newSpcBase->next = NULL;
	
	mer->start = newSpcBase;
	
//	mvnv(0,"new %llu :\t%hu\n", mer->seq, mer->start->spaced_bases)
	
}

PUBLIC_FUNC spcKmerSet* init_spckmerset(ubyte8 init_size, float load_factor){
	spcKmerSet *set;
	if(init_size < 3) init_size = 3;
	else init_size = find_next_prime_kh(init_size);

	set = (spcKmerSet*)malloc(sizeof(spcKmerSet));
	set->size   = init_size;
	set->count  = 0;
	set->max    = set->size * load_factor;
	if(load_factor <= 0) load_factor = 0.25f;
	else if(load_factor >= 1) load_factor = 0.75f;
	set->load_factor = load_factor;
	//set->iter_ptr    = 0;
	set->array = calloc(set->size, sizeof(spcKmer));
	set->flags = malloc((set->size + 15)/16 * 4);
	memset(set->flags, 0x55, (set->size + 15) / 16 * 4);
	return set;
}

PUBLIC_FUNC int search_spckmerset(spcKmerSet *set, ubyte8 seq, spcKmer **rs){
	ubyte8 hc;
	hc = seq % set->size;
	while(1){
		if(is_kmer_entity_null(set->flags, hc)){
			return 0;
		} else {
			if(get_kmer_seq(set->array[hc]) == seq){
				*rs = set->array + hc;
				return 1;
			}
		}
		hc ++;
		if(hc == set->size) hc = 0;
	}
	return 0;
}

PROTECTED_FUNC static inline void encap_spckmerset(spcKmerSet *set, ubyte8 num){
	ubyte4 *flags, *f;
	ubyte8 i, n, size, hc;
	spcKmer key, tmp;
	if(set->count + num <= set->max) return;

	n = set->size;
	do{
		if(n < 0xFFFFFFFU)
			n <<= 1;
		else
			n += 0xFFFFFFU;
		n = find_next_prime_kh(n); 
	} while(n * set->load_factor < set->count + num);

	set->array = realloc(set->array, n * sizeof(spcKmer));
	if(set->array == NULL){
		fprintf(stderr, "-- Out of memory --\n");
		abort();
	}
	flags = malloc((n+15)/16 * 4);
	memset(flags, 0x55, (n+15)/16 * 4);
	size = set->size;
	set->size = n;
	set->max = n * set->load_factor;
	f = set->flags;
	set->flags = flags;
	flags = f;
	for(i=0;i<size;i++){
		if(!exists_kmer_entity(flags, i)) continue;
		key = set->array[i];
		set_kmer_entity_del(flags, i);
		while(1){
			hc = get_kmer_seq(key) % set->size;
			while(!is_kmer_entity_null(set->flags, hc)){ hc ++; if(hc == set->size) hc = 0; }
			clear_kmer_entity_null(set->flags, hc);
			if(hc < size && exists_kmer_entity(flags, hc)){ 
				tmp = key;
				key = set->array[hc];
				set->array[hc] = tmp;
				set_kmer_entity_del(flags, hc);
			} else {
				set->array[hc] = key;
				break;
			}
		}
	}
	free(flags);
}

PUBLIC_FUNC int put_spckmerset(spcKmerSet *set, Kmer spc_kmer, ubyte2 spaced_bases, kmer_t *node){
	ubyte8 hc;
	encap_spckmerset(set, 1);
	hc = spc_kmer % set->size;
	do{
		if(is_kmer_entity_null(set->flags, hc)){		//new! repeat_flag==0
			clear_kmer_entity_null(set->flags, hc);
			set_new_spckmer(set->array + hc, spc_kmer, spaced_bases, node);
			set->count ++;
			return 0;
		} else {
			if(get_kmer_seq(set->array[hc]) == spc_kmer){	//exists! repeat_flag==1 or 0
				return update_spckmer(set->array + hc, spaced_bases, node);
			}
		}
		hc ++;
		if(hc == set->size) hc = 0;
	} while(1);
	return 3;
}

PUBLIC_FUNC void buildSpcKmerSet(KmerSet *set, spcKmerSet *spaced_kset)
{
	boolean spcFlag;
	Kmer buff_kmer, spc_kmer;
	ubyte2 spc_bases;
	
	ubyte8 i=0; // ,j=0;
	for(i=0;i<set->size;i++)
	{
		if(is_kmer_entity_null(set->flags, i))
			continue;
		else
		{
//			kmer_t **kmer_p;
//			*kmer_p = set->array+i;
			if(set->array[i].deleted != 1)	//kmer not repeat
			{
				//spaced seed: 18 of 25, build masker and use >>,&,| for each part, only assign once
				//	1 1111 1010 1100 1111 1101 0110			!!!OLD!!!
				//	1 1111 1111 1111 1010 1100 1000			!!!NEW!!!
				//	11 11111111 11111111 11111111 11001100 11110000 11000000			!!!NEW!!!				
				
				buff_kmer = get_kmer_seq(set->array[i]);
				
				spc_kmer = ((buff_kmer>>14)&0xFFFFFFF00) | ((buff_kmer>>12)&0xC0) | ((buff_kmer>>10)&0x3C) | ((buff_kmer>>6)&0x3);
				//0xFFFFFFF00 = 1111 11111111 11111111 11111111 00000000
				//			 0xC0 = 0000 00000000 00000000 00000000 11000000
				//			 0x3C = 0000 00000000 00000000 00000000 00111100
				//				0x3 = 0000 00000000 00000000 00000000 00000011
				
				spc_bases = ((buff_kmer>>8)&0x3000) | ((buff_kmer>>6)&0xC00) | ((buff_kmer>>2)&0x3C0) | (buff_kmer&0x3F);
				//		 0x3000 = 110000 00000000
				//			0xC00 = 001100 00000000
				//			0x3C0 = 000011 11000000
				//			 0x3F = 000000 00111111

				//build the 18mer and the spaced bases(7mer), put them in the spaced_kmer hash				
				spcFlag = put_spckmerset(spaced_kset, spc_kmer, spc_bases, set->array+i);
				if(spcFlag!=0)
					fprintf(stderr, "flag error: %c\tkmer exists: %llu %hu\n", spcFlag, spc_kmer, spc_bases);
//				if((++j)%100000==0)
//					fprintf(stderr,"--- %lluth spaced Kmer built\n",j);		
			}
			
		}
	}
	//fprintf(stderr,"--- total %llu spaced Kmer built in a KmerSet\n",j);	
}
